********************************************************************************
** Cederman, Galano, Girardin and Schvitz. War Did Make States.
** Article prepared for International Organization
** June 20, 2022
**
** Stata do-file: data_prep_dyadic_cent.do
** Data preparation file for dyad-level data (Centennia)
** Required file paths set in runall.do 
** (to be run after data_prep1 but before data_prep2)
********************************************************************************

cd $ROOT
cd $INPUTDIR  

insheet using $INPUTFILED, clear

// Data preparation steps in stata for file "AbramsonClippedDyadic"
// set dyad id variables
gen idd = ida*1000 + idb
gen idd2 = idd
replace idd2 = idb * 1000 + ida if idb!=. & idb < ida
 
sort idd year

gen ldist = log(mindistance+1)
gen neigh = 0
replace neigh = 1 if mindistance<10


// Merge in state level data for State A
gen id = ida
sort id year
cd $ROOT
cd $INTERMEDIATEDIR
merge id year using statedata_intermediate
 drop _merge
gen areaa = area
gen popa = population
gen lareaa = log(areaa)
gen lpopa = log(popa)

gen growthwara = growthwarbreckearea
gen growthpeacea = growthpeacebreckearea
gen shrinkwara = shrinkwarbreckearea
gen shrinkpeacea = shrinkpeacebreckearea

replace elevationsd = 0 if elevationsd == .
gen elevsda = elevationsd
gen deatha = death
gen agea = age
gen coastmina = coastmin


drop id area population /*waryears*/ growthwarbreckearea growthpeacebreckearea shrinkwarbreckearea shrinkpeacebreckearea  elevationsd ///


gen id = idb
sort id year
cd $ROOT
cd $INTERMEDIATEDIR
merge id year using statedata_intermediate
 drop _merge 
gen areab = area
gen popb = population
gen lareab = log(areab)   
gen lpopb = log(popb)
gen growthwarb = growthwarbreckearea
gen growthpeaceb = growthpeacebreckearea
gen shrinkwarb = shrinkwarbreckearea
gen shrinkpeaceb = shrinkpeacebreckearea
gen elevsdb = elevationsd
gen deathb = death
gen ageb = age
gen coastminb = coastmin

drop id area population growthwarbreckearea growthpeacebreckearea shrinkwarbreckearea shrinkpeacebreckearea elevationsd death age coastmin 


// Growth vars
xtset idd year
replace growthwara = 0 if growthwara == .
replace growthpeacea = 0 if growthpeacea == .
replace growthwarb = 0 if growthwarb == .
replace growthpeaceb = 0 if growthpeaceb == .



// Shrink vars
replace shrinkwara = 0 if shrinkwara == .
replace shrinkpeacea = 0 if shrinkpeacea == .
replace shrinkwarb = 0 if shrinkwarb == .
replace shrinkpeaceb = 0 if shrinkpeaceb == .



gen llgrowthwara = log(l5.growthwara+1)
gen llgrowthpeacea = log(l5.growthpeacea+1)
gen llgrowthwarb = log(l5.growthwarb+1)
gen llgrowthpeaceb = log(l5.growthpeaceb+1)
gen llshrinkwara = log(l5.shrinkwara+1)
gen llshrinkpeacea = log(l5.shrinkpeacea+1)
gen llshrinkwarb = log(l5.shrinkwarb+1)
gen llshrinkpeaceb = log(l5.shrinkpeaceb+1)

gen llgrowtha = log(l5.growthwara+l5.growthpeacea+1) 
gen llshrinkb = log(l5.shrinkwarb+l5.shrinkpeaceb+1)


 // Netgain
replace gainaarea = 0 if gainaarea == .
replace lossaarea = 0 if lossaarea == .
drop netgain
gen netgain = .
replace netgain = gainaarea - lossaarea
gen netgain0 = netgain
 replace netgain0 = 0 if netgain<0 & netgain!=. 

 gen netloss0 = abs(netgain)
 replace netloss0 = 0 if netgain>0 & netgain!=. 
 
 // DVs...
 gen lnga = log(netgain0+1)
 gen lnla = log(netloss0+1)
 gen nga1 = 0
 replace nga1 = 1 if netgain0 > 100
 
 // Conflict variables
drop inc1
 gen inc1 = .
 replace inc1 = 0 if incidencebrecke == 0 
 replace inc1 = 1 if incidencebrecke > 0  & incidencebrecke!=.


 
 gen lnga2 = log(abs(netgain)+1)
 
 gen nga21 = 0
 replace nga21 = 1 if lnga2>0 & lnga2!=.
 
 gen nga2 = abs(netgain)

  
 xtset idd year
 
 // DYADIC ANALYSIS
 gen lneigh = l5.neigh
 gen lldist = l5.ldist
 gen llareaa = l5.lareaa
 gen llareab = l5.lareab
 

label variable lnga "log net terr. gain"
label variable lneigh "neighbors"
label variable lldist "log distance"
label variable inc1 "conflict"
label variable llareaa "log size A"
label variable llareab "log size B"
label variable llgrowthwara "log cumul. war gains A"
label variable llgrowthpeacea "log cumul. peace gains A"
 
 

// The following code was previously in the data_dyad_LEC_v4 file

// Compute various relational measures aggregated to the country level
// Used as control variables...

 xtset idd year
 gen lagareaa = l5.areaa
 gen lagareab = l5.areab
 
 
 bys ida year: egen lnumneighsa = sum(lneigh)
 gen llnumneighsa = log(lnumneighsa+1)
 

 bys ida year: egen lsumneighareaa = sum(lneigh*lagareab)
 replace lsumneighareaa = 0 if lsumneighareaa == .
 

 bys ida year: egen lmaxneighareaa = max(lneigh*lagareab)
 replace lmaxneighareaa = 0 if lmaxneighareaa == .
 
 
 gen llmaxneighareaa = log(lmaxneighareaa+1)

 gen llrthreata = log(lagareaa)/log(lagareaa+lmaxneighareaa)
 gen lrmaxnb = lagareaa/(lagareaa+lmaxneighareaa)
 
  bys ida year: egen lmeanneighareaa = mean(lneigh*lagareab)
 replace lmeanneighareaa = 0 if lmeanneighareaa == .
 gen lrmeannb = lagareaa/(lagareaa+lmeanneighareaa)
 
   bys ida year: egen lminneighareaa = min(lneigh*lagareab)
 replace lminneighareaa = 0 if lminneighareaa == .
 gen lrminnb = lagareaa/(lagareaa+lminneighareaa)
 
 
  bys idb year: egen lnumneighsb = sum(lneigh)
 gen llnumneighsb = log(lnumneighsb+1)

 drop lnumneighsb
bys ida year: egen numneighsb = sum(neigh)
 gen lnumneighsb = log(numneighsb+1)

 

 bys idb year: egen lsumneighareab = sum(lneigh*lagareaa)
 replace lsumneighareab = 0 if lsumneighareab == .
 

 bys idb year: egen lmaxneighareab = max(lneigh*lagareaa)
 replace lmaxneighareab = 0 if lmaxneighareab == .
 
 gen llmaxneighareab = log(lmaxneighareab+1)
 
 gen llrthreatb = log(lagareab)/log(lagareab+lmaxneighareab)
 
 xtset idd year
 
 gen llelevsda = log(l5.elevsda+1)
 gen llelevsdb = log(l5.elevsdb+1)
 

 bys ida year: egen melevsdneigh = mean(elevsdb)
 replace melevsdneigh = elevsda if melevsdneigh == .
  sort idd year
 gen llelevsdneigh = log(l5.melevsdneigh+1)
 
 // Revised DV with losses dyad coded as missing
 
 gen lnga0 = lnga
 replace lnga0 = . if netgain < 0 

 
 gen lnla0 = lnla
 replace lnla0 = . if netgain > 0 

// This measure is referred to as relative log size in the paper

gen llr = llareaa/log(l5.areaa + l5.areab)
gen llrpop = l5.lpopa/log(l5.popa+l5.popb)
gen inc1Xllr = inc1 * llr


bys ida year: egen sumlareab = sum(lagareab)
gen llrnb = 1
replace llrnb = llareaa / log(lagareaa + sumlareab)


xtset idd year

gen lrgrowthwarab = 0
replace lrgrowthwarab = log(l5.growthwara+1) / log(l5.growthwara + l5.growthwarb + exp(1))

gen lrgrowthpeaceab = 0
replace lrgrowthpeaceab = log(l5.growthpeacea+1) / log(l5.growthpeacea + l5.growthpeaceb + exp(1))

gen lrshrinkwarba = 0
replace lrshrinkwarba = log(l5.shrinkwarb+1) / log(l5.shrinkwara + l5.shrinkwarb + exp(1))

gen lrshrinkpeaceba = 0
replace lrshrinkpeaceba = log(l5.shrinkpeaceb+1) / log(l5.shrinkpeacea + l5.shrinkpeaceb + exp(1))

// Compute dummy DVs

 drop nga1

gen nga1 = 0
replace nga1 = 1 if netgain > 0 & netgain != .

gen nla1 = 0
replace nla1 = 1 if netgain < 0 & netgain != .

 drop event
gen event = 0
replace event = 1 if nga1==1 | nla1==1 
 drop noeventyear
btscs  event year idd, gen(noeventyear) nspline(3)
 drop evspline*
rename _spline1  evspline1
rename _spline2  evspline2
rename _spline3  evspline3


gen nga10 = nga1
replace nga10 = . if netgain < 0 & netgain != .

gen nla10 = nla1
replace nla10 = . if netgain > 0 & netgain != .

// Save intermediate analysis data
cd $ROOT
cd $INTERMEDIATEDIR
save dyad_analysis1, replace
// save dyad_analysis1warlag, replace


///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Aggregation to country level

gen dyad_wargains = inc1*netgain0
gen dyad_peacegains = (1-inc1)*netgain0
gen dyad_warlosses = inc1*netloss0
gen dyad_peacelosses = (1-inc1)*netloss0


collapse (sum)dyad_wargains dyad_peacegains dyad_warlosses dyad_peacelosses  (max) llnumneighsa lnumneighsa llrthreata lr*nb, by (ida year) 

xtset ida year

rename ida id 

// Save country-level variables that are used in country-level analysis
cd $ROOT
cd $INTERMEDIATEDIR
save dyad_cumul.dta, replace


///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Reload main analysis data again
cd $ROOT
cd $INTERMEDIATEDIR
use dyad_analysis1, clear
// use dyad_analysis1warlag, clear


// Merge in country level data for States A and B
gen id = ida
merge m:1 id year using dyad_cumul
gen dyad_wargainsa = dyad_wargains
gen dyad_peacegainsa = dyad_peacegains
gen dyad_warlossesa = dyad_warlosses
gen dyad_peacelossesa = dyad_peacelosses
 drop id _merge dyad_peacegains dyad_warlosses dyad_peacelosses


replace dyad_wargainsa = 0 if dyad_wargainsa == .
replace dyad_peacegainsa = 0 if dyad_peacegainsa == .
replace dyad_warlossesa = 0 if dyad_warlossesa == .
replace dyad_peacelossesa = 0 if dyad_peacelossesa == .

gen id = idb
merge m:1 id year using dyad_cumul
gen dyad_wargainsb = dyad_wargains
gen dyad_peacegainsb = dyad_peacegains
gen dyad_warlossesb = dyad_warlosses
gen dyad_peacelossesb = dyad_peacelosses
 drop id _merge dyad_wargains dyad_peacegains dyad_warlosses dyad_peacelosses


replace dyad_wargainsb = 0 if dyad_wargainsb == .
replace dyad_peacegainsb = 0 if dyad_peacegainsb == .
replace dyad_warlossesb = 0 if dyad_warlossesb == .
replace dyad_peacelossesb = 0 if dyad_peacelossesb == .

xtset idd year

gen laggrowthwarb = l5.growthwarb
gen laggrowthpeaceb = l5.growthpeaceb
gen lagshrinkwarb = l5.shrinkwarb
gen lagshrinkpeaceb = l5.shrinkpeaceb

bys ida year: egen nbsum_growthwar = sum(lneigh*laggrowthwarb)
bys ida year: egen nbsum_growthpeace = sum(lneigh*laggrowthpeaceb)
bys ida year: egen nbsum_shrinkwar = sum(lneigh*lagshrinkwarb)
bys ida year: egen nbsum_shrinkpeace = sum(lneigh*lagshrinkpeaceb)

xtset idd year
gen lrwargrowthnb = log(l5.growthwara+1)/log(l5.growthwara+nbsum_growthwar+exp(1))
gen lrpeacegrowthnb = log(l5.growthpeacea+1)/log(l5.growthpeacea+nbsum_growthpeace+exp(1))
gen lrwarshrinknb = log(l5.shrinkwara+1)/log(l5.shrinkwara+nbsum_shrinkwar+exp(1))
gen lrpeaceshrinknb = log(l5.shrinkpeacea+1)/log(l5.shrinkpeacea+nbsum_shrinkpeace+exp(1))

// Analysis data including onset etc.
gen onsetinita1 = 0
replace onsetinita1 = 1 if  onsetinitiatorabrecke > 0 & onsetinitiatorabrecke !=.

btscs onsetinita1 year idd, gen(pyis) nspline(3)
rename _spline1  pyispline1
rename _spline2  pyispline2
rename _spline3  pyispline3



gen incinita1 = 0
replace incinita1 = 1 if inc1==1 & incidenceinitiatorabrecke > 0 & incidenceinitiatorabrecke !=.

gen incinitsidea1 = incinita1
replace incinitsidea1 = 1 if incidencefollowerabrecke > 0 & incidencefollowerabrecke !=.

gen incattackedb1 = 0
replace incattackedb1 = 1 if incidencebrecke>0 & incidenceinitiatorbbrecke==0 & incidencefollowerbbrecke==0

gen inc1Xgrowthwara = inc1 * llgrowthwara
gen inc1Xgrowthpeacea = inc1 * llgrowthpeacea
gen inc1Xshrinkwarb = inc1 * llshrinkwarb
gen inc1Xshrinkpeaceb = inc1 * llshrinkpeaceb

gen inc1Xgrowthwarab = inc1 * lrgrowthwarab
gen inc1Xgrowthpeaceab = inc1 * lrgrowthpeaceab
gen inc1Xshrinkwarba = inc1 * lrshrinkwarba
gen inc1Xshrinkpeaceba = inc1 * lrshrinkpeaceba

gen llagea = log(l5.agea*5+5)
gen llageb = log(l5.ageb*5+5)
gen llcoastdista = log(l5.coastmina+1)
gen llcoastdistb = log(l5.coastminb+1)
gen lcoastala = 0
replace lcoastala = 1 if coastmina < 10
gen lcoastalb = 0
replace lcoastalb = 1 if coastminb < 10

gen lr = l5.areaa/(l5.areaa+l5.areab)
gen lrwar = l5.growthwara / (l5.growthwara + l5.growthwarb)
gen lrpeace = l5.growthpeacea / (l5.growthpeacea + l5.growthpeaceb)

// Save main analysis data
cd $ROOT
cd $INTERMEDIATEDIR
save dyad_analysis2, replace  


///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Again aggregate analysis data to country level

collapse (max)dyad_* (max)lr*nb, by (ida year)
rename ida id
save cumulneigh, replace

cd $ROOT
cd $INTERMEDIATEDIR
use dyad_analysis2, clear

 
 erase dyad_analysis1.dta
